R graph library: https://rmarkdown.rstudio.com/authoring_quick_tour.html
Why ggplot2? ggplot2 is an R package dedicated to data visualization. It can greatly improve the quality and aesthetics of your graphics, and will make you much more efficient in creating them. ggplot2 allows to build almost any type of chart.
create a project: organizes all files (data, output, scripts) related to a specific analysis within a dedicated folder, automatically setting the working directory to that folder when you open the project
you can set working directory: setting a working directory simply specifies the current location where R will look for files to read or save, without necessarily creating a structured project folder
#setwd("path/to/project")
ggplotThree main components:
ggplot()
aes() aes =
geom_...
First, load packages we’ll be using.
# most simple to load the whole tidyverse
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# if you're having trouble, these shuold be all you need:
#library(ggplot2)
#library(tidyr)
Next, load the data we’ll be working with today (courtesy of Janani Ravi and Arjun Krishnan).
# load the table in the data folder
gene_loc <- read.table("GSE69360.gene-locations.txt",
header = T)
Quick inspection of the dataset.
# get column names
colnames(gene_loc)
## [1] "Geneid" "Chr" "Start" "End" "Strand" "Length"
# see first 5 rows
head(gene_loc)
## Geneid Chr Start End Strand Length
## 1 ENSG00000223972 chr1 11869 12227 + 1756
## 2 ENSG00000227232 chr1 14363 14829 - 2073
## 3 ENSG00000243485 chr1 29554 30039 + 1021
## 4 ENSG00000237613 chr1 34554 35174 - 1219
## 5 ENSG00000268020 chr1 52473 53312 + 947
## 6 ENSG00000240361 chr1 62948 63887 + 940
# see last 5 rows
tail(gene_loc)
## Geneid Chr Start End Strand Length
## 57815 ENSG00000198786 chrM 12337 14148 + 1812
## 57816 ENSG00000198695 chrM 14149 14673 - 525
## 57817 ENSG00000210194 chrM 14674 14742 - 69
## 57818 ENSG00000198727 chrM 14747 15887 + 1141
## 57819 ENSG00000210195 chrM 15888 15953 + 66
## 57820 ENSG00000210196 chrM 15956 16023 - 68
Creating a plot w/ Grammar of Graphics
ggplot, factor, aesgeom_bar, geom_histogramfacet_wrapscale_x_log10, labs,
coord_flip, theme,
theme_minimalgene_loc %>% # data
ggplot(aes(x = Chr)) + # aesthetics: what to plot?
geom_bar() # geometry: how to plot?
This looks messy. The names are overlapping, and the order is not correct. Let’s try making some adjustments.
# convert the chromosome names to factors
gene_loc$Chr <- factor(gene_loc$Chr,
levels = paste("chr",
c((1:22), "X", "Y", "M"),
sep=""))
# remember that you can assign names to ggplot objects
plot_chr_numgenes <- gene_loc %>%
ggplot(aes(x = Chr)) +
geom_bar()
plot_chr_numgenes
Still hard to read, so let’s try rotating the axes.
plot_chr_numgenes + # load the previously-named ggplot object
coord_flip() + # rotate the axes
theme_minimal() # change to a white background
Let’s say we want to the order to be reversed. We can also do that here.
plot_chr_numgenes +
coord_flip() +
theme_minimal() +
scale_x_discrete(limits = rev(levels(gene_loc$Chr))) # change the order
Label the axes and assign a title
plot_chr_numgenes +
labs(title = "No. genes per chromosome",
x = "Chromosome",
y = "No. of genes") +
theme_minimal() +
coord_flip()+
scale_x_discrete(limits = rev(levels(gene_loc$Chr)))
We can also make other geometries, such as a histogram.
gene_loc %>%
ggplot(aes(x = Length)) +
geom_histogram(color = "white") + # color here is the
scale_x_log10() + # put the histogram on the log scale
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Next, we can separate the histograms into facets my chromosome name.
# Create a facet grid using `facet_wrap()`.
plot_chr_genelength <- gene_loc %>% # assign a new ggplot object
ggplot(aes(x = Length, fill = Chr)) +
geom_histogram(color = "white") +
scale_x_log10() +
theme_minimal() +
facet_wrap(~Chr, #sort facet by these data levels
scales = "free_y") #only y-axis can vary
# view plot
plot_chr_genelength
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Because each facet grid is already labeled, we do not need to have a
legend. We also can rotate the axes using theme() and
element_text().
# remove legend and add labels
plot_chr_genelength +
theme(legend.position = "none") +
labs(x = "Gene length (log-scale)",
y = "No. of genes") +
theme(axis.text.x = element_text(angle = 45, hjust = .75))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Notice that colors can be changed globally (i.e. all one color), or at the data level (factors, values).
For example:
fill here is at the data level (each facet has its own
color).
gene_loc %>%
ggplot(aes(x = Length, fill = Chr)) + # FILL is at the DATA level
geom_histogram(color = "white") +
scale_x_log10() +
theme_minimal() +
facet_wrap(~Chr, scales = "free_y") +
theme(legend.position = "none") +
labs(x = "Gene length (log-scale)",
y = "No. of genes") +
theme(axis.text.x = element_text(angle = 45, hjust = .75))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fill here is at the global level (all facets colored the
same).
gene_loc %>%
ggplot(aes(x = Length)) + # FILL at the DATA level is REMOVED
geom_histogram(color = "white", fill="blue") + # FILL is now GLOBAL
scale_x_log10() +
theme_minimal() +
facet_wrap(~Chr, scales = "free_y") +
theme(legend.position = "none") +
labs(x = "Gene length (log-scale)",
y = "No. of genes") +
theme(axis.text.x = element_text(angle = 45, hjust = .75))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
You can use scales to customize your colors. Or, you can
use them to match the level or categorical feature you mention in
aes(fill=...) or aes(color=...). In this
sense, scales “talk” to the aesthetics functions and work
hand-in-hand.
Here are some to consider:
To use them, you need color palettes, such as these below:
Color palettes from http://colorbrewer2.org (made by a geography student at MSU!)
# load colorbrewer library
library(RColorBrewer)
Check out the colors that they offer.
# show all colors
display.brewer.all()
You can change the number according to how many colors you’d like to use to see what’s available.
# show side-by-side
par(mfrow=c(1,2))
# show 3 colors
display.brewer.all(3)
# show for 10 colors
display.brewer.all(10)
Quick example using scale_fill_manual():
# Because we have more data levels than the colorbrewer colors, make a palette
num_cols <- length(levels(gene_loc$Chr)) #get number of colors needed
newcolors <- colorRampPalette(brewer.pal(10, "BrBG"))(num_cols) #select palette
# make a new ggplot object with the basics (going to use later)
num_genes_chr <- gene_loc %>%
ggplot(aes(x = Length, fill = Chr)) + # FILL is at the DATA level
geom_histogram(color = "white") +
scale_x_log10() +
theme_minimal() +
facet_wrap(~Chr, scales = "free_y") +
theme(legend.position = "none") +
labs(x = "Gene length (log-scale)",
y = "No. of genes") +
theme(axis.text.x = element_text(angle = 45, hjust = .75))
# add custom colors
num_genes_chr +
scale_fill_manual(values = newcolors) # try values=rev(newcolors) too!
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Wes Anderson palettes, from the tumbler blog, https://wesandersonpalettes.tumblr.com/.
# load library
library(wesanderson)
# get names of movie color themes
names(wes_palettes)
## [1] "BottleRocket1" "BottleRocket2" "Rushmore1"
## [4] "Rushmore" "Royal1" "Royal2"
## [7] "Zissou1" "Zissou1Continuous" "Darjeeling1"
## [10] "Darjeeling2" "Chevalier1" "FantasticFox1"
## [13] "Moonrise1" "Moonrise2" "Moonrise3"
## [16] "Cavalcanti1" "GrandBudapest1" "GrandBudapest2"
## [19] "IsleofDogs1" "IsleofDogs2" "FrenchDispatch"
## [22] "AsteroidCity1" "AsteroidCity2" "AsteroidCity3"
# show side-by-side
par(mfrow=c(3,3))
# print out a few to see what they look like
wes_palette("FantasticFox1") #Fantastic Mr. Fox (2009)
wes_palette("Zissou1") #The Life Aquatic with Steve Zissou (2004)
wes_palette("GrandBudapest1") #The Grand Budapest Hotel (2014)
wes_palette("GrandBudapest2") #The Grand Budapest Hotel (2014)
wes_palette("Darjeeling1") #The Darjeeling Limited (2007)
wes_palette("Darjeeling2") #The Darjeeling Limited (2007)
wes_palette("Royal1") #The Royal Tenenbaums (2001)
wes_palette("Royal2") #The Royal Tenenbaums (2001)
wes_palette("Moonrise1") #Moonrise Kingdom (2012)
Quick example using one of these:
newcolors <- wes_palette(name = "Zissou1", #select one of the color names
n = num_cols,
type = "continuous") #as opposed to "discrete"
# add custom colors
num_genes_chr +
scale_fill_manual(values = newcolors)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# another way to do it is within scale_fill_manual
num_genes_chr +
scale_fill_manual(values = wes_palette(n=num_cols, name = 'Darjeeling1', type = "continuous"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Other color sources to check out
Color-blind-friendly colors from Paul Tol: https://personal.sron.nl/~pault/data/colourschemes.pdf
R color cheatsheet: https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/colorPaletteCheatsheet.pdf
Viridis color library:
#install.packages("viridis")
#library(viridis)
library(ggpubr)
library(ggrepel)
scatter <- ggplot(gene_loc, aes(x=End-Start, y=Length, group=Chr, color=Chr)) +
geom_point()
scatter
It is hard to visualize the entire data.
Let’s pretend we are only interested in a small set of chromosomes.
Let’s subset the data and add a few variables!
target <- c("chrX", "chrY", "chrM", "chr17")
gene_loc2 <- filter(gene_loc, Chr %in% target)
log_EndStart <- log10(gene_loc2$End-gene_loc2$Start)
log_length <- log10(gene_loc2$Length)
gene_loc2$log_length <- log_length
gene_loc2$log_EndStart <- log_EndStart
head(gene_loc2)
## Geneid Chr Start End Strand Length log_length log_EndStart
## 1 ENSG00000273288 chr17 4961 5048 - 88 1.944483 1.939519
## 2 ENSG00000272636 chr17 5810 6168 - 1480 3.170262 2.553883
## 3 ENSG00000273172 chr17 33615 34249 - 1185 3.073718 2.802089
## 4 ENSG00000181031 chr17 62293 63714 - 5953 3.774736 3.152594
## 5 ENSG00000262920 chr17 171183 171422 + 432 2.635484 2.378398
## 6 ENSG00000262061 chr17 180996 183279 + 2284 3.358696 3.358506
scatter <- ggplot(gene_loc2, aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point()
scatter
scatter <- ggplot(gene_loc2, aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point() +
theme_bw()
scatter
box1 <- ggplot(gene_loc2, aes(x = Chr, y = Length, group=Chr, color=Chr)) +
geom_boxplot() +
theme_bw()
box1
scatter <- ggplot(gene_loc2 ,aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point() +
theme_bw() +
xlim(0, 2500)+ ylim(0, 10000)
scatter
## Warning: Removed 392 rows containing missing values or values outside the scale range
## (`geom_point()`).
scatter3 <- ggplot(gene_loc2 ,aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point(alpha = 0.7, size =0.5) +
theme_bw() +
xlim(0, 2500)+ ylim(0, 10000)
scatter3
## Warning: Removed 392 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggarrange(scatter, scatter3,
labels = c("A", "B"),
ncol = 2, nrow = 1)
## Warning: Removed 392 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 392 rows containing missing values or values outside the scale range
## (`geom_point()`).
trans_scatter <- scatter +
scale_x_log10("End-Start") +
scale_y_log10("Gene length") +
theme_minimal()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
trans_scatter
scatter1 <- ggplot(gene_loc2, aes(x = log_EndStart, y = log_length, color=Chr)) +
geom_point() +
theme_bw() +
geom_smooth(method=lm, se=FALSE)
scatter1
## `geom_smooth()` using formula = 'y ~ x'
scatter2 <- ggplot(gene_loc2, aes(x = log_EndStart, y = log_length, color=Chr)) +
geom_point(size =1, alpha = 0.2) +
geom_smooth(method=lm, se=FALSE) +
theme_bw()
scatter2
## `geom_smooth()` using formula = 'y ~ x'
ggarrange(scatter1, scatter2,
labels = c("A", "B"),
ncol = 2, nrow = 1)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
scatter <- ggplot(gene_loc2, aes(x = log_EndStart, y = log_length, color = Chr))+
geom_point() +
theme_bw() +
geom_smooth(method = lm, se = FALSE)+
ggpubr::stat_cor()
scatter
## `geom_smooth()` using formula = 'y ~ x'
scatter <- ggplot(gene_loc2, aes(x = log_EndStart, y = log_length, color = Chr))+
geom_point() +
geom_smooth(method = lm, se = FALSE)+
ggpubr::stat_regline_equation()
scatter
## `geom_smooth()` using formula = 'y ~ x'
Your boss wants to see the lines in different plots!
multiple regression with equation and r2 different plots
ml_scatter <- ggscatter(gene_loc2, x="log_EndStart", y="log_length",
color = "Chr", palette = "jco",
add = "reg.line", add.params = list(color = "black")) +
facet_wrap(~Chr) +
stat_cor(label.y = 4.4) +
stat_regline_equation(label.y = 4.2)
ml_scatter
scatter <- ggplot(gene_loc2 ,aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point()
scatter
scatter <- ggplot(gene_loc2, aes(x = End-Start, y = Length, group=Chr, color=Chr)) +
geom_point()+
geom_text(label=gene_loc2$Geneid, size = 2, color="black")
scatter
a <- gene_loc %>%
group_by(Chr) %>%
summarize(meanLength = mean(Length), numGenes = n())
head(a)
## # A tibble: 6 × 3
## Chr meanLength numGenes
## <fct> <dbl> <int>
## 1 chr1 2258. 5363
## 2 chr2 2304. 4047
## 3 chr3 2382. 3101
## 4 chr4 2109. 2563
## 5 chr5 2188. 2859
## 6 chr6 2124. 2905
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()
scatter2
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()+
geom_text(label=a$Chr, size = 2, color="black")
scatter2
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()+
geom_text_repel(aes(label = Chr), color="red", segment.color="blue")
scatter2
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()+
geom_text_repel(aes(label = Chr), color="red", segment.color="blue")+
geom_smooth(method = loess, color = "lightblue", alpha = 0.1)
scatter2
## `geom_smooth()` using formula = 'y ~ x'
Data import
Reading in a subset of the gene count data | Chr22
highvar_genes <- read_tsv("chr22-gene-logcpm.txt")
## Rows: 901 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Geneid, Sample, Source, Stage, Tissue
## dbl (1): Logcpm
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
highvar_genes_df <- highvar_genes %>%
select(Geneid, Sample, Logcpm) %>%
spread(Sample, Logcpm) %>%
column_to_rownames(var="Geneid") %>%
as.data.frame()
library(pheatmap); library(RColorBrewer)
pheatmap(highvar_genes_df, cluster_rows=F, cluster_cols=F)
pheatmap(highvar_genes_df)
pheatmap(highvar_genes_df,
color=brewer.pal(n=9, name="YlGnBu"), # changing cell colors
border_color="grey90") # changing border colors
library(heatmap3)
heatmap3(highvar_genes_df)
heatmap3(highvar_genes_df,
col=brewer.pal(n=9, name="YlGn")) # go green!
Heatmap using ggplot and geom_tile
works with tidy data format
highvar_genes$Sample <- factor(highvar_genes$Sample)
ggplot(data=highvar_genes,
aes(x=Sample, y=Geneid, fill=Logcpm)) +
geom_tile(size=0.3) + ## to create heatmap
scale_fill_gradient(low="white", high="darkred") + # color spec
#scale_fill_distiller(palette = "YlGnBu") + # if using RColorBrewer
scale_x_discrete(position="top") + # legend position
theme_minimal() + #coord_flip() +
theme(axis.text.x=element_text(angle=90,hjust=0,vjust=0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
can use hclust for clustering
can use ggdendro to add dendrograms
#install.packages("ggsci")
#install.packages("tidyverse")
#install.packages("ggpubr")
#install.packages("ggrepel")
#install.packages("cowplot")
#install.packages("gridExtra")
library(ggsci)
library(tidyverse)
library(ggpubr)
library(ggrepel)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## The following object is masked from 'package:lubridate':
##
## stamp
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
Read the data
We’ll use the plots you’ve already created in part3: box1, ml_scatter, scatter2
## Data Import
gene_loc <- read.table("GSE69360.gene-locations.txt",
header = T)
## Plotting the data
scatter <- ggplot(gene_loc, aes(x=End-Start, y=Length, group=Chr, color=Chr)) +
geom_point()
scatter
It is hard to visualize the entire data.
Let’s pretend we are only interested in a small set of chromosomes.
Let’s subset the data and add a few variables!
target <- c("chrX", "chrY", "chrM", "chr17")
gene_loc2 <- filter(gene_loc, Chr %in% target)
log_EndStart <- log10(gene_loc2$End-gene_loc2$Start)
log_length <- log10(gene_loc2$Length)
gene_loc2$log_length <- log_length
gene_loc2$log_EndStart <- log_EndStart
head(gene_loc2)
## Geneid Chr Start End Strand Length log_length log_EndStart
## 1 ENSG00000273288 chr17 4961 5048 - 88 1.944483 1.939519
## 2 ENSG00000272636 chr17 5810 6168 - 1480 3.170262 2.553883
## 3 ENSG00000273172 chr17 33615 34249 - 1185 3.073718 2.802089
## 4 ENSG00000181031 chr17 62293 63714 - 5953 3.774736 3.152594
## 5 ENSG00000262920 chr17 171183 171422 + 432 2.635484 2.378398
## 6 ENSG00000262061 chr17 180996 183279 + 2284 3.358696 3.358506
– Combine boxplot and regression plot previously created
### Boxplot
box1 <- ggplot(gene_loc2, aes(x = Chr, y = Length,
group=Chr, color=Chr)) +
geom_boxplot() +
xlab("Chr")+
theme_bw()
box1 <- box1 + scale_color_jco()+
theme(legend.position = "none") # remove legend
#box1 + scale_fill_discrete(name = "Chromosome") #rename legend
ml_scatter <- ggscatter(gene_loc2, x = "log_EndStart", y = "log_length",
color = "Chr", palette = "jco",
add = "reg.line", add.params = list(color = "black"), # customize regression line
fullrange = TRUE) +
facet_wrap(~Chr) +
stat_cor(label.y = 4.4) +
stat_regline_equation(label.y = 4.2)
ml_scatter
ml_scatter <- ml_scatter + facet_grid(cols = vars(Chr))
#Arranging boxplot and scatterplot on one page
## we’ll use the function ggarrange()[in ggpubr], which is a wrapper around the function plot_grid() [in cowplot package].
## Compared to the standard function plot_grid(), ggarange() can arrange multiple ggplots over multiple pages.
## use either
fig <- ggarrange(ml_scatter, box1 + rremove ("x.text"),
labels = c("A", "B"),
ncol =1, nrow = 2,
common.legend = TRUE)
fig
annotate_figure(fig,
top = text_grob("Visualizing gene length", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
Adding descriptive text
Text to be added
text <- paste("gse69360 data set is a resource of ribosomal",
"RNA-depleted RNA-Seq data from different normal",
"adults and fetal human tissues. The dataset was first",
"published in https://www.nature.com/articles/sdata201563", sep = " ")
text.p <- ggparagraph(text = text, face = "italic", size = 11, color = "black")
final_plt <- ggarrange(ml_scatter, box1, text.p + rremove ("x.text"),
labels = c("A", "B"),
ncol =1, nrow = 3,
common.legend = TRUE)
final_plt
ggsave("mean_lth_viz.pdf", width = 5, height = 5, units = "in")
Place scatterplot together with density plot
Scatter plot colored by groups (“Chr”)
sp <- gene_loc2 %>%
ggscatter("log_EndStart", "log_length",
color = "Chr", palette = "jco",
size = 1, alpha = 0.2)+
border()
xplot <- gene_loc2 %>%
ggdensity("log_EndStart", fill = "Chr", palette = "jco")
yplot <- gene_loc2 %>%
ggdensity("log_length", fill = "Chr", palette = "jco")+
rotate()
yplot <- yplot + clean_theme()
xplot <- xplot + clean_theme()
fig_2 <- ggarrange(xplot, NULL, sp, yplot,
ncol = 2, nrow = 2, align = "hv",
widths = c(2, 1), heights = c(1, 2),
common.legend = TRUE)
scat_density_plt <- annotate_figure(fig_2,
top = text_grob("Scatter plot with marginal density", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
scat_density_plt
ggsave("marginal_distribution.pdf", width = 5, height = 5, units = "in")
Insert a table into a plot
Regression plot summary data with labels & confidence interval made in part 2 using code below.
a <- gene_loc %>%
group_by(Chr) %>%
summarize(meanLength = mean(Length), numGenes = n())
head(a)
## # A tibble: 6 × 3
## Chr meanLength numGenes
## <chr> <dbl> <int>
## 1 chr1 2258. 5363
## 2 chr10 2160. 2260
## 3 chr11 2218. 3208
## 4 chr12 2342. 2818
## 5 chr13 1875. 1217
## 6 chr14 1892. 2244
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()+
geom_text_repel(aes(label = Chr), color="red", segment.color="blue")+
geom_smooth(method = loess, color = "lightblue", alpha = 0.1)
scatter2
## `geom_smooth()` using formula = 'y ~ x'
stable <- gene_loc2 %>%
desc_statby(measure.var = "Length", #Column containing variable to be sumarized
grps = "Chr") # Group
stable <- stable[, c("Chr", "length", "mean", "sd")]
stable.p <- ggtexttable(stable, rows = NULL,
theme = ttheme("mBlue"))
fig_3 <- ggarrange(scatter2, stable.p, text.p + rremove ("x.text"),
ncol =1, nrow = 3,
heights = c(1, 0.5, 0.2),
common.legend = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
fig_3
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
fig_4 <- scatter2 + annotation_custom(ggplotGrob(stable.p),
xmin = 2000, ymax = 1500,
xmax = 5000)
final_scatter2 <- annotate_figure(fig_4,
top = text_grob("Scatter plot with summary table", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
## `geom_smooth()` using formula = 'y ~ x'
final_scatter2
#Export plot
ggsave("mean_length.pdf", width = 5, height = 5, units = "in")
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps